{
 "metadata": {
  "name": "",
  "signature": "sha256:af14713612af6df3af27b7546e7130b724231374a9fb48b739f50bf0b51b776c"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "heading",
     "level": 1,
     "metadata": {},
     "source": [
      "Time series"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from __future__ import division\n",
      "from pandas import Series, DataFrame\n",
      "import pandas as pd\n",
      "from numpy.random import randn\n",
      "import numpy as np\n",
      "pd.options.display.max_rows = 12\n",
      "np.set_printoptions(precision=4, suppress=True)\n",
      "import matplotlib.pyplot as plt\n",
      "plt.rc('figure', figsize=(12, 4))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%cd ../book_scripts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%matplotlib inline"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Date and Time Data Types and Tools"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from datetime import datetime\n",
      "now = datetime.now()\n",
      "now"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "now.year, now.month, now.day"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)\n",
      "delta"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "delta.days"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "delta.seconds"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from datetime import timedelta\n",
      "start = datetime(2011, 1, 7)\n",
      "start + timedelta(12)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "start - 2 * timedelta(12)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Converting between string and datetime"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "stamp = datetime(2011, 1, 3)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "str(stamp)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "stamp.strftime('%Y-%m-%d')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "value = '2011-01-03'\n",
      "datetime.strptime(value, '%Y-%m-%d')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "datestrs = ['7/6/2011', '8/6/2011']\n",
      "[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from dateutil.parser import parse\n",
      "parse('2011-01-03')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "parse('Jan 31, 1997 10:45 PM')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "parse('6/12/2011', dayfirst=True)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "datestrs"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.to_datetime(datestrs)\n",
      "# note: output changed (no '00:00:00' anymore)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "idx = pd.to_datetime(datestrs + [None])\n",
      "idx"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "idx[2]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.isnull(idx)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Time Series Basics"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from datetime import datetime\n",
      "dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),\n",
      "         datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]\n",
      "ts = Series(np.random.randn(6), index=dates)\n",
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "type(ts)\n",
      "# note: output changed to \"pandas.core.series.Series\""
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.index"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts + ts[::2]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.index.dtype\n",
      "# note: output changed from dtype('datetime64[ns]') to dtype('<M8[ns]')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "stamp = ts.index[0]\n",
      "stamp\n",
      "# note: output changed from <Timestamp: 2011-01-02 00:00:00> to Timestamp('2011-01-02 00:00:00')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Indexing, selection, subsetting"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "stamp = ts.index[2]\n",
      "ts[stamp]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts['1/10/2011']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts['20110110']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "longer_ts = Series(np.random.randn(1000),\n",
      "                   index=pd.date_range('1/1/2000', periods=1000))\n",
      "longer_ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "longer_ts['2001']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "longer_ts['2001-05']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts[datetime(2011, 1, 7):]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts['1/6/2011':'1/11/2011']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.truncate(after='1/9/2011')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')\n",
      "long_df = DataFrame(np.random.randn(100, 4),\n",
      "                    index=dates,\n",
      "                    columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n",
      "long_df.ix['5-2001']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Time series with duplicate indices"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000',\n",
      "                          '1/3/2000'])\n",
      "dup_ts = Series(np.arange(5), index=dates)\n",
      "dup_ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dup_ts.index.is_unique"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dup_ts['1/3/2000']  # not duplicated"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dup_ts['1/2/2000']  # duplicated"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "grouped = dup_ts.groupby(level=0)\n",
      "grouped.mean()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "grouped.count()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Date ranges, Frequencies, and Shifting"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.resample('D')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Generating date ranges"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "index = pd.date_range('4/1/2012', '6/1/2012')\n",
      "index"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.date_range(start='4/1/2012', periods=20)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.date_range(end='6/1/2012', periods=20)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.date_range('1/1/2000', '12/1/2000', freq='BM')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.date_range('5/2/2012 12:56:31', periods=5)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Frequencies and Date Offsets"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from pandas.tseries.offsets import Hour, Minute\n",
      "hour = Hour()\n",
      "hour"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "four_hours = Hour(4)\n",
      "four_hours"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Hour(2) + Minute(30)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.date_range('1/1/2000', periods=10, freq='1h30min')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "Week of month dates"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.date_range('1/1/2012', '9/1/2012', freq='WOM-3FRI')\n",
      "list(rng)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Shifting (leading and lagging) data"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts = Series(np.random.randn(4),\n",
      "            index=pd.date_range('1/1/2000', periods=4, freq='M'))\n",
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.shift(2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.shift(-2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "ts / ts.shift(1) - 1"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.shift(2, freq='M')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.shift(3, freq='D')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.shift(1, freq='3D')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.shift(1, freq='90T')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "Shifting dates with offsets"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from pandas.tseries.offsets import Day, MonthEnd\n",
      "now = datetime(2011, 11, 17)\n",
      "now + 3 * Day()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "now + MonthEnd()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "now + MonthEnd(2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "offset = MonthEnd()\n",
      "offset.rollforward(now)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "offset.rollback(now)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts = Series(np.random.randn(20),\n",
      "            index=pd.date_range('1/15/2000', periods=20, freq='4d'))\n",
      "ts.groupby(offset.rollforward).mean()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.resample('M', how='mean')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Time Zone Handling"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pytz\n",
      "pytz.common_timezones[-5:]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tz = pytz.timezone('US/Eastern')\n",
      "tz"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Localization and Conversion"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')\n",
      "ts = Series(np.random.randn(len(rng)), index=rng)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print(ts.index.tz)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts_utc = ts.tz_localize('UTC')\n",
      "ts_utc"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts_utc.index"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts_utc.tz_convert('US/Eastern')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts_eastern = ts.tz_localize('US/Eastern')\n",
      "ts_eastern.tz_convert('UTC')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts_eastern.tz_convert('Europe/Berlin')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.index.tz_localize('Asia/Shanghai')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Operations with time zone-aware Timestamp objects"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "stamp = pd.Timestamp('2011-03-12 04:00')\n",
      "stamp_utc = stamp.tz_localize('utc')\n",
      "stamp_utc.tz_convert('US/Eastern')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')\n",
      "stamp_moscow"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "stamp_utc.value"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "stamp_utc.tz_convert('US/Eastern').value"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# 30 minutes before DST transition\n",
      "from pandas.tseries.offsets import Hour\n",
      "stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')\n",
      "stamp"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "stamp + Hour()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# 90 minutes before DST transition\n",
      "stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')\n",
      "stamp"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "stamp + 2 * Hour()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Operations between different time zones"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')\n",
      "ts = Series(np.random.randn(len(rng)), index=rng)\n",
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts1 = ts[:7].tz_localize('Europe/London')\n",
      "ts2 = ts1[2:].tz_convert('Europe/Moscow')\n",
      "result = ts1 + ts2\n",
      "result.index"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Periods and Period Arithmetic"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p = pd.Period(2007, freq='A-DEC')\n",
      "p"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p + 5"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p - 2"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.Period('2014', freq='A-DEC') - p"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.period_range('1/1/2000', '6/30/2000', freq='M')\n",
      "rng"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Series(np.random.randn(6), index=rng)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "values = ['2001Q3', '2002Q2', '2003Q1']\n",
      "index = pd.PeriodIndex(values, freq='Q-DEC')\n",
      "index"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Period Frequency Conversion"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p = pd.Period('2007', freq='A-DEC')\n",
      "p.asfreq('M', how='start')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p.asfreq('M', how='end')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p = pd.Period('2007', freq='A-JUN')\n",
      "p.asfreq('M', 'start')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p.asfreq('M', 'end')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p = pd.Period('Aug-2007', 'M')\n",
      "p.asfreq('A-JUN')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.period_range('2006', '2009', freq='A-DEC')\n",
      "ts = Series(np.random.randn(len(rng)), index=rng)\n",
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.asfreq('M', how='start')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.asfreq('B', how='end')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Quarterly period frequencies"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p = pd.Period('2012Q4', freq='Q-JAN')\n",
      "p"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p.asfreq('D', 'start')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p.asfreq('D', 'end')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n",
      "p4pm"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "p4pm.to_timestamp()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')\n",
      "ts = Series(np.arange(len(rng)), index=rng)\n",
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n",
      "ts.index = new_rng.to_timestamp()\n",
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Converting Timestamps to Periods (and back)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.date_range('1/1/2000', periods=3, freq='M')\n",
      "ts = Series(randn(3), index=rng)\n",
      "pts = ts.to_period()\n",
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.date_range('1/29/2000', periods=6, freq='D')\n",
      "ts2 = Series(randn(6), index=rng)\n",
      "ts2.to_period('M')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pts = ts.to_period()\n",
      "pts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pts.to_timestamp(how='end')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Creating a PeriodIndex from arrays"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data = pd.read_csv('ch08/macrodata.csv')\n",
      "data.year"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data.quarter"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')\n",
      "index"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data.index = index\n",
      "data.infl"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Resampling and Frequency Conversion"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.date_range('1/1/2000', periods=100, freq='D')\n",
      "ts = Series(randn(len(rng)), index=rng)\n",
      "ts.resample('M', how='mean')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.resample('M', how='mean', kind='period')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Downsampling"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.date_range('1/1/2000', periods=12, freq='T')\n",
      "ts = Series(np.arange(12), index=rng)\n",
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.resample('5min', how='sum')\n",
      "# note: output changed (as the default changed from closed='right', label='right' to closed='left', label='left'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.resample('5min', how='sum', closed='left')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.resample('5min', how='sum', closed='left', label='left')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.resample('5min', how='sum', loffset='-1s')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "Open-High-Low-Close (OHLC) resampling"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.resample('5min', how='ohlc')\n",
      "# note: output changed because of changed defaults"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "Resampling with GroupBy"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.date_range('1/1/2000', periods=100, freq='D')\n",
      "ts = Series(np.arange(100), index=rng)\n",
      "ts.groupby(lambda x: x.month).mean()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.groupby(lambda x: x.weekday).mean()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Upsampling and interpolation"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame = DataFrame(np.random.randn(2, 4),\n",
      "                  index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),\n",
      "                  columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n",
      "frame"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df_daily = frame.resample('D')\n",
      "df_daily"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame.resample('D', fill_method='ffill')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame.resample('D', fill_method='ffill', limit=2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame.resample('W-THU', fill_method='ffill')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Resampling with periods"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame = DataFrame(np.random.randn(24, 4),\n",
      "                  index=pd.period_range('1-2000', '12-2001', freq='M'),\n",
      "                  columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n",
      "frame[:5]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "annual_frame = frame.resample('A-DEC', how='mean')\n",
      "annual_frame"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Q-DEC: Quarterly, year ending in December\n",
      "annual_frame.resample('Q-DEC', fill_method='ffill')\n",
      "# note: output changed, default value changed from convention='end' to convention='start' + 'start' changed to span-like\n",
      "# also the following cells"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "annual_frame.resample('Q-DEC', fill_method='ffill', convention='start')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "annual_frame.resample('Q-MAR', fill_method='ffill')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Time series plotting"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "close_px_all = pd.read_csv('ch09/stock_px.csv', parse_dates=True, index_col=0)\n",
      "close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]\n",
      "close_px = close_px.resample('B', fill_method='ffill')\n",
      "close_px.info()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "close_px['AAPL'].plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "close_px.ix['2009'].plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "close_px['AAPL'].ix['01-2011':'03-2011'].plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "appl_q = close_px['AAPL'].resample('Q-DEC', fill_method='ffill')\n",
      "appl_q.ix['2009':].plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Moving window functions"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "close_px = close_px.asfreq('B').fillna(method='ffill')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "close_px.AAPL.plot()\n",
      "pd.rolling_mean(close_px.AAPL, 250).plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "plt.figure()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "appl_std250 = pd.rolling_std(close_px.AAPL, 250, min_periods=10)\n",
      "appl_std250[5:12]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "appl_std250.plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Define expanding mean in terms of rolling_mean\n",
      "expanding_mean = lambda x: rolling_mean(x, len(x), min_periods=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.rolling_mean(close_px, 60).plot(logy=True)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "plt.close('all')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Exponentially-weighted functions"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, sharey=True,\n",
      "                         figsize=(12, 7))\n",
      "\n",
      "aapl_px = close_px.AAPL['2005':'2009']\n",
      "\n",
      "ma60 = pd.rolling_mean(aapl_px, 60, min_periods=50)\n",
      "ewma60 = pd.ewma(aapl_px, span=60)\n",
      "\n",
      "aapl_px.plot(style='k-', ax=axes[0])\n",
      "ma60.plot(style='k--', ax=axes[0])\n",
      "aapl_px.plot(style='k-', ax=axes[1])\n",
      "ewma60.plot(style='k--', ax=axes[1])\n",
      "axes[0].set_title('Simple MA')\n",
      "axes[1].set_title('Exponentially-weighted MA')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Binary moving window functions"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "close_px\n",
      "spx_px = close_px_all['SPX']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "spx_rets = spx_px / spx_px.shift(1) - 1\n",
      "returns = close_px.pct_change()\n",
      "corr = pd.rolling_corr(returns.AAPL, spx_rets, 125, min_periods=100)\n",
      "corr.plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "corr = pd.rolling_corr(returns, spx_rets, 125, min_periods=100)\n",
      "corr.plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "User-defined moving window functions"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from scipy.stats import percentileofscore\n",
      "score_at_2percent = lambda x: percentileofscore(x, 0.02)\n",
      "result = pd.rolling_apply(returns.AAPL, 250, score_at_2percent)\n",
      "result.plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Performance and Memory Usage Notes"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.date_range('1/1/2000', periods=10000000, freq='10ms')\n",
      "ts = Series(np.random.randn(len(rng)), index=rng)\n",
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.resample('15min', how='ohlc').info()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%timeit ts.resample('15min', how='ohlc')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rng = pd.date_range('1/1/2000', periods=10000000, freq='1s')\n",
      "ts = Series(np.random.randn(len(rng)), index=rng)\n",
      "%timeit ts.resample('15s', how='ohlc')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}